Library necessary packages

library(rsvd)
library(Rtsne)
library(ggplot2)
library(cowplot)
library(sva)
library(igraph)
library(cccd)
library(KernSmooth)
library(beeswarm)
library(stringr)
library(reshape2)
library(pvclust)
library(NMF)

Load data

source("Fxns.R")
EEC_UMIs <- load_data("./Extend_data/GSE92332_EEC_UMIcounts.txt.gz")
## [1] "File exists!"
## 2018-01-04 00:28:09 INFO: Data dimensions: 14225x533
### whether need this step??
gene_expr <- as.numeric(apply(EEC_UMIs, 1, sum))
EEC_UMIs <- EEC_UMIs[which(gene_expr > 0), ]
EEC_tpm = data.frame(log2(1 + tpm(EEC_UMIs)))
## 2018-01-04 00:28:10 INFO: Running TPM normalisation

Select data

v = get.variable.genes(EEC_UMIs, min.cv2 = 100)
## 2018-01-04 00:28:17 INFO: Fitting only the 9114 genes with mean expression > 0.0393996247654784

## 2018-01-04 00:28:17 INFO: Found 2746 variable genes (p<0.05)
var.genes = as.character(rownames(v)[v$p.adj < 0.05])

Figure a

Pvclust

sample.names <- colnames(EEC_tpm)
region.names <- unlist(lapply(sample.names, function(x) return(str_split(x, 
    "_")[[1]][3])))
cell.type.names <- unlist(lapply(sample.names, function(x) return(str_split(x, 
    "_")[[1]][4])))
all.genes <- rownames(EEC_tpm)
# EEC.pv<-pvclust(as.data.frame(t(EEC_tpm[var.genes,])),method.hclust =
# 'ward.D2',method.dist = 'correlation', nboot = 10000,parallel = TRUE,quiet
# = FALSE) save(EEC.pv,file='EEC_pvclust.RData')
load("./Extend_data/EEC_pvclust.RData")
plot(EEC.pv)

Figure b

EEC.pca <- read.table("./Extend_data/EEC_pca_scores.txt")[, 1:11]

Heatmap_corr <- function(data, condition, all.condition) {
    cat(sprintf("There ara %d conditions\n", length(condition)))
    tpm <- data.frame()
    for (i in 1:length(condition)) {
        tpm <- rbind(tpm, t(data[, all.condition %in% condition[i]]))
    }
    # cat(sprintf('Whether creat data accurate %d
    # \n',sum(dim(tpm.data)[1]==dim(tpm)[1])))
    tpm <- data.frame(t(tpm))
    cat(sprintf("Whether creat data accurate %d \n", sum(dim(data)[1] == dim(tpm)[2])))
    ### create Condition
    Condition <- c()
    for (i in 1:length(condition)) {
        Condition <- c(Condition, rep(condition[i], sum(all.condition %in% condition[i])))
    }
    
    return(list(Condition, tpm))
}

EEC.pca.sort <- Heatmap_corr(data = t(EEC.pca), condition = unique(cell.type.names), 
    all.condition = cell.type.names)
## There ara 12 conditions
## Whether creat data accurate 0
EEC.corr <- cor(EEC.pca.sort[[2]], method = "pearson")

aheatmap(EEC.corr, Colv = NA, Rowv = NA, annCol = EEC.pca.sort[[1]], annRow = EEC.pca.sort[[1]])

Figure d

genes.d <- c("Vipr1", "Ffar2", "Gper1", "Gpr119", "Gpbar1", "Ptger3", "Galr1", 
    "Cnr1", "Ffar4", "Adgrd1", "Ffar1")

EEC.heatmap.d <- Heatmap_fun(genes = genes.d, tpm.data = EEC_tpm, condition = unique(cell.type.names), 
    all.condition = cell.type.names)
## There ara 12 conditions
## Whether creat data accurate 0
aheatmap(EEC.heatmap.d[[2]], Colv = NA, Rowv = NA, annCol = EEC.heatmap.d[[1]])